{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "336519da-4d2e-4de9-8c39-42cc67546ecd",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import xarray as xr"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "07d6c9ef-b880-4e32-ae6b-be746871244b",
   "metadata": {},
   "source": [
    "## ESRI data\n",
    "\n",
    "The most recent 2020 census tracts are used to aggregate the data. More info about each netCDF is below:\n",
    " \n",
    "- Daily Minimum temperature from 2006-2021  \n",
    "- Daily Maximum temperature from 2006-2021\n",
    "- Daily Relative Humidity from 2006-2021  \n",
    "- Daily Smoke from 2006-2021 \n",
    "- and Daily PM 2.5 data from 2006-2020"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "a476f172-7479-43dd-8729-10b2dd8fa9b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "tmax = xr.open_dataset(\"data/wc/MaxTemp_daily_CensusTract_2005_2021_WesternUS.nc\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "0d2a5ad9",
   "metadata": {},
   "outputs": [],
   "source": [
    "locs = tmax[[\"location_label\"]].to_dataframe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "16fcc399",
   "metadata": {},
   "outputs": [],
   "source": [
    "tmax = tmax[[\"MEAN_NONE_SPATIAL_NEIGHBORS\"]].to_dataframe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "8360fcb7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Kelvin to Censius\n",
    "tmax[\"MEAN_NONE_SPATIAL_NEIGHBORS\"] = tmax[\"MEAN_NONE_SPATIAL_NEIGHBORS\"] - 273.15"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "498350a1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>MEAN_NONE_SPATIAL_NEIGHBORS</th>\n",
       "      <th>lat</th>\n",
       "      <th>lon</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>time</th>\n",
       "      <th>locations</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">2004-12-31</th>\n",
       "      <th>0</th>\n",
       "      <td>10.507685</td>\n",
       "      <td>36.740146</td>\n",
       "      <td>-109.854391</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10.008235</td>\n",
       "      <td>36.752539</td>\n",
       "      <td>-109.366007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8.527029</td>\n",
       "      <td>35.955471</td>\n",
       "      <td>-109.145939</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8.570941</td>\n",
       "      <td>36.386734</td>\n",
       "      <td>-109.313362</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>9.604968</td>\n",
       "      <td>36.171561</td>\n",
       "      <td>-109.686426</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      MEAN_NONE_SPATIAL_NEIGHBORS        lat         lon\n",
       "time       locations                                                    \n",
       "2004-12-31 0                            10.507685  36.740146 -109.854391\n",
       "           1                            10.008235  36.752539 -109.366007\n",
       "           2                             8.527029  35.955471 -109.145939\n",
       "           3                             8.570941  36.386734 -109.313362\n",
       "           4                             9.604968  36.171561 -109.686426"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tmax.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "3ce43f4e",
   "metadata": {},
   "outputs": [],
   "source": [
    "tmax = tmax.reset_index()\n",
    "locs = locs.reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "55dc2ab7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "112432572"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(tmax)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "f3e4ed04",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = tmax.merge(locs, left_on='locations', right_on=\"labelDim\", how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "e1053626",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "112432572"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "705564c6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>time</th>\n",
       "      <th>locations</th>\n",
       "      <th>tmax</th>\n",
       "      <th>lat</th>\n",
       "      <th>lon</th>\n",
       "      <th>labelDim</th>\n",
       "      <th>location_label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2004-12-31</td>\n",
       "      <td>0</td>\n",
       "      <td>10.507685</td>\n",
       "      <td>36.740146</td>\n",
       "      <td>-109.854391</td>\n",
       "      <td>0</td>\n",
       "      <td>04001942600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2004-12-31</td>\n",
       "      <td>1</td>\n",
       "      <td>10.008235</td>\n",
       "      <td>36.752539</td>\n",
       "      <td>-109.366007</td>\n",
       "      <td>1</td>\n",
       "      <td>04001942700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2004-12-31</td>\n",
       "      <td>2</td>\n",
       "      <td>8.527029</td>\n",
       "      <td>35.955471</td>\n",
       "      <td>-109.145939</td>\n",
       "      <td>2</td>\n",
       "      <td>04001944000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2004-12-31</td>\n",
       "      <td>3</td>\n",
       "      <td>8.570941</td>\n",
       "      <td>36.386734</td>\n",
       "      <td>-109.313362</td>\n",
       "      <td>3</td>\n",
       "      <td>04001944100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2004-12-31</td>\n",
       "      <td>4</td>\n",
       "      <td>9.604968</td>\n",
       "      <td>36.171561</td>\n",
       "      <td>-109.686426</td>\n",
       "      <td>4</td>\n",
       "      <td>04001944201</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        time  locations       tmax        lat         lon  labelDim  \\\n",
       "0 2004-12-31          0  10.507685  36.740146 -109.854391         0   \n",
       "1 2004-12-31          1  10.008235  36.752539 -109.366007         1   \n",
       "2 2004-12-31          2   8.527029  35.955471 -109.145939         2   \n",
       "3 2004-12-31          3   8.570941  36.386734 -109.313362         3   \n",
       "4 2004-12-31          4   9.604968  36.171561 -109.686426         4   \n",
       "\n",
       "  location_label  \n",
       "0    04001942600  \n",
       "1    04001942700  \n",
       "2    04001944000  \n",
       "3    04001944100  \n",
       "4    04001944201  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "65fdb169",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Axes: >"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiMAAAGsCAYAAAAPJKchAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAArmUlEQVR4nO3df3SUVX7H8c8kJgMRBsEsBEL4sQLKD4P8EAy7CqxAxCzKtuVY2G5YqrTacA5uFJd4VkhkNSyISBUBDytx95ii2AO2imCKhhxKcE2EClgptEBUkiCrJCFZhjFz+4eHkTE/YCZD7szk/TpnDj73uXee7zfJJB+feWbGYYwxAgAAsCTGdgEAAKBjI4wAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqyIqjJSUlGjGjBnq06ePHA6Htm3bFtD63NxcORyOJrdrr7326hQMAAAuK6LCSH19vUaOHKm1a9cGtf7RRx9VZWWl323YsGGaNWtWiCsFAABXKqLCyPTp0/Xb3/5WP/vZz5rd73a79eijjyo5OVnXXnutxo8fr+LiYt/+Ll26KCkpyXerrq7WJ598ovvvv7+dOgAAAN8XUWHkchYsWKDS0lJt3rxZH3/8sWbNmqW77rpLR48ebXb+xo0bNWTIEN1+++3tXCkAALgoasJIRUWFNm3apC1btuj222/XDTfcoEcffVQ//vGPtWnTpibzz58/r1dffZWzIgAAWHaN7QJC5eDBg2psbNSQIUP8xt1ut66//vom87du3aq6ujrNnTu3vUoEAADNiJowcu7cOcXGxqq8vFyxsbF++7p06dJk/saNG/XTn/5UvXr1aq8SAQBAM6ImjIwaNUqNjY06ffr0Za8BOX78uN5//33927/9WztVBwAAWhJRYeTcuXM6duyYb/v48eM6cOCAevTooSFDhujnP/+5MjMztWrVKo0aNUpffvmldu3apdTUVGVkZPjWvfzyy+rdu7emT59uow0AAHAJhzHG2C7iShUXF2vy5MlNxufOnauCggJ5PB799re/1R/+8Ad98cUXSkxM1G233aa8vDzdfPPNkiSv16v+/fsrMzNTTz31VHu3AAAAvieiwggAAIg+UfPSXgAAEJkIIwAAwKqIuIDV6/Xq1KlT6tq1qxwOh+1yAADAFTDGqK6uTn369FFMTMvnPyIijJw6dUopKSm2ywAAAEH47LPP1Ldv3xb3R0QY6dq1q6Rvm3G5XEHdh8fj0bvvvqtp06YpLi4ulOWFBfqLbPQX+aK9R/qLbLb6q62tVUpKiu/veEsiIoxcfGrG5XK1KYwkJCTI5XJF7Q8a/UUu+ot80d4j/UU22/1d7hILLmAFAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBV19guAAAi0YDFb/ttO2ONVoyTRuTulLux9Y9Lt+XE8gzbJQDN4swIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwIKI+vWrVNqaqpcLpdcLpfS0tL0zjvvtDi/oKBADofD79apU6c2Fw0AAKJHQB+U17dvXy1fvlyDBw+WMUavvPKK7r33Xu3fv1/Dhw9vdo3L5dKRI0d82w5HeH6AFAAAsCOgMDJjxgy/7aeeekrr1q3Tvn37WgwjDodDSUlJwVcIAACiWkBh5FKNjY3asmWL6uvrlZaW1uK8c+fOqX///vJ6vRo9erSefvrpFoPLRW63W26327ddW1srSfJ4PPJ4PEHVe3FdsOvDHf1FNvqLPM5Y478dY/z+DUdt+fpH4/fwUvR3dY97OQ5jTECPnIMHDyotLU3nz59Xly5dVFhYqLvvvrvZuaWlpTp69KhSU1NVU1OjZ555RiUlJTp8+LD69u3b4jFyc3OVl5fXZLywsFAJCQmBlAsAACxpaGjQnDlzVFNTI5fL1eK8gMPIhQsXVFFRoZqaGr3xxhvauHGjdu/erWHDhl12rcfj0dChQzV79mwtW7asxXnNnRlJSUnRmTNnWm3mcscuKirS1KlTFRcXF9R9hDP6i2z0F3lG5O7023bGGC0b69UTZTFye8Pz2rhDuelBr43G7+Gl6O/qqK2tVWJi4mXDSMBP08THx2vQoEGSpDFjxujDDz/UmjVrtGHDhsuujYuL06hRo3Ts2LFW5zmdTjmdzmbXt/WLGIr7CGf0F9noL3K4G5sPHG6vo8V9toXiax9N38Pm0F/oj3cl2vw+I16v1+8sRmsaGxt18OBB9e7du62HBQAAUSKgMyM5OTmaPn26+vXrp7q6OhUWFqq4uFg7d357ujIzM1PJycnKz8+XJD355JO67bbbNGjQIJ09e1YrV67UyZMn9cADD4S+EwAAEJECCiOnT59WZmamKisr1a1bN6Wmpmrnzp2aOnWqJKmiokIxMd+dbPn66681f/58VVVVqXv37hozZoz27t17RdeXAACAjiGgMPL73/++1f3FxcV+26tXr9bq1asDLgoAAHQcfDYNAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwKqAwsm7dOqWmpsrlcsnlciktLU3vvPNOq2u2bNmim266SZ06ddLNN9+s7du3t6lgAAAQXQIKI3379tXy5ctVXl6usrIy/eQnP9G9996rw4cPNzt/7969mj17tu6//37t379fM2fO1MyZM3Xo0KGQFA8AACJfQGFkxowZuvvuuzV48GANGTJETz31lLp06aJ9+/Y1O3/NmjW66667tGjRIg0dOlTLli3T6NGj9cILL4SkeAAAEPmuCXZhY2OjtmzZovr6eqWlpTU7p7S0VNnZ2X5j6enp2rZtW6v37Xa75Xa7fdu1tbWSJI/HI4/HE1S9F9cFuz7c0V9ko7/I44w1/tsxxu/fcNSWr380fg8vRX9X97iX4zDGBPTIOXjwoNLS0nT+/Hl16dJFhYWFuvvuu5udGx8fr1deeUWzZ8/2jb344ovKy8tTdXV1i8fIzc1VXl5ek/HCwkIlJCQEUi4AALCkoaFBc+bMUU1NjVwuV4vzAj4zcuONN+rAgQOqqanRG2+8oblz52r37t0aNmxYmwq+VE5Ojt8ZldraWqWkpGjatGmtNtMaj8ejoqIiTZ06VXFxcaEqNWzQX2Sjv8gzInen37YzxmjZWK+eKIuR2+uwVFXrDuWmB702Gr+Hl6K/q+PiMxuXE3AYiY+P16BBgyRJY8aM0Ycffqg1a9Zow4YNTeYmJSU1OQNSXV2tpKSkVo/hdDrldDqbjMfFxbX5ixiK+whn9BfZ6C9yuBubDxxur6PFfbaF4msfTd/D5tBf6I93Jdr8PiNer9fv+o5LpaWladeuXX5jRUVFLV5jAgAAOp6Azozk5ORo+vTp6tevn+rq6lRYWKji4mLt3Pnt6crMzEwlJycrPz9fkrRw4UJNnDhRq1atUkZGhjZv3qyysjK99NJLoe8EAABEpIDCyOnTp5WZmanKykp169ZNqamp2rlzp6ZOnSpJqqioUEzMdydbJkyYoMLCQv3mN7/R448/rsGDB2vbtm0aMWJEaLsAAAARK6Aw8vvf/77V/cXFxU3GZs2apVmzZgVUFAAA6Dj4bBoAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYFdDbwQPA1TBg8du2SwBgEWdGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgVUBhJD8/X7feequ6du2qnj17aubMmTpy5EirawoKCuRwOPxunTp1alPRAAAgegQURnbv3q2srCzt27dPRUVF8ng8mjZtmurr61td53K5VFlZ6budPHmyTUUDAIDocU0gk3fs2OG3XVBQoJ49e6q8vFx33HFHi+scDoeSkpKCqxAAAES1gMLI99XU1EiSevTo0eq8c+fOqX///vJ6vRo9erSefvppDR8+vMX5brdbbrfbt11bWytJ8ng88ng8QdV6cV2w68Md/UW2jt6fM9a0ZzlXhTPG+P0bjtry89XRf0Yjna3+rvR4DmNMUI8cr9ere+65R2fPntWePXtanFdaWqqjR48qNTVVNTU1euaZZ1RSUqLDhw+rb9++za7Jzc1VXl5ek/HCwkIlJCQEUy4AAGhnDQ0NmjNnjmpqauRyuVqcF3QYeeihh/TOO+9oz549LYaK5ng8Hg0dOlSzZ8/WsmXLmp3T3JmRlJQUnTlzptVmLnfcoqIiTZ06VXFxcUHdRzijv8jW0fsbkbvTQlWh5YwxWjbWqyfKYuT2OmyX06xDuelBr+3oP6ORzlZ/tbW1SkxMvGwYCeppmgULFuitt95SSUlJQEFEkuLi4jRq1CgdO3asxTlOp1NOp7PZtW39IobiPsIZ/UW2jtqfuzE8/3gHw+11hG0/ofjZ6qg/o9Givfu70mMF9GoaY4wWLFigrVu36r333tPAgQMDLqyxsVEHDx5U7969A14LAACiT0BnRrKyslRYWKg333xTXbt2VVVVlSSpW7du6ty5syQpMzNTycnJys/PlyQ9+eSTuu222zRo0CCdPXtWK1eu1MmTJ/XAAw+EuBUAABCJAgoj69atkyRNmjTJb3zTpk365S9/KUmqqKhQTMx3J1y+/vprzZ8/X1VVVerevbvGjBmjvXv3atiwYW2rHAAARIWAwsiVXOtaXFzst7169WqtXr06oKIAAEDHwWfTAAAAqwgjAADAqja9AysAIHIMWPx20GudsUYrxn37njDt+dLlE8sz2u1YsIczIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALAqoDCSn5+vW2+9VV27dlXPnj01c+ZMHTly5LLrtmzZoptuukmdOnXSzTffrO3btwddMAAAiC4BhZHdu3crKytL+/btU1FRkTwej6ZNm6b6+voW1+zdu1ezZ8/W/fffr/3792vmzJmaOXOmDh061ObiAQBA5LsmkMk7duzw2y4oKFDPnj1VXl6uO+64o9k1a9as0V133aVFixZJkpYtW6aioiK98MILWr9+fZBlAwCAaBFQGPm+mpoaSVKPHj1anFNaWqrs7Gy/sfT0dG3btq3FNW63W26327ddW1srSfJ4PPJ4PEHVenFdsOvDHf1Fto7enzPWtGc5V4Uzxvj9G21s9ddej4mO/hi82se9HIcxJqifLK/Xq3vuuUdnz57Vnj17WpwXHx+vV155RbNnz/aNvfjii8rLy1N1dXWza3Jzc5WXl9dkvLCwUAkJCcGUCwAA2llDQ4PmzJmjmpoauVyuFucFfWYkKytLhw4dajWIBCsnJ8fvbEptba1SUlI0bdq0VptpjcfjUVFRkaZOnaq4uLhQlRo26C+ydfT+RuTutFBVaDljjJaN9eqJshi5vQ7b5YScrf4O5aa3y3E6+mPwarn4zMblBBVGFixYoLfeekslJSXq27dvq3OTkpKanAGprq5WUlJSi2ucTqecTmeT8bi4uDZ/EUNxH+GM/iJbR+3P3Rg9f7zdXkdU9fN97d1fez8eOupj8Goe70oE9GoaY4wWLFigrVu36r333tPAgQMvuyYtLU27du3yGysqKlJaWloghwYAAFEqoDMjWVlZKiws1JtvvqmuXbuqqqpKktStWzd17txZkpSZmank5GTl5+dLkhYuXKiJEydq1apVysjI0ObNm1VWVqaXXnopxK0AAIBIFNCZkXXr1qmmpkaTJk1S7969fbfXXnvNN6eiokKVlZW+7QkTJqiwsFAvvfSSRo4cqTfeeEPbtm3TiBEjQtcFAACIWAGdGbmSF94UFxc3GZs1a5ZmzZoVyKEAAEAHwWfTAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwIOIyUlJZoxY4b69Okjh8Ohbdu2tTq/uLhYDoejya2qqirYmgEAQBQJOIzU19dr5MiRWrt2bUDrjhw5osrKSt+tZ8+egR4aAABEoWsCXTB9+nRNnz494AP17NlT1113XcDrAABAdAs4jATrlltukdvt1ogRI5Sbm6sf/ehHLc51u91yu92+7draWkmSx+ORx+MJ6vgX1wW7PtzRX2Tr6P05Y017lnNVOGOM37/RxlZ/7fWY6OiPwat93MtxGGOC/slyOBzaunWrZs6c2eKcI0eOqLi4WGPHjpXb7dbGjRv1xz/+UR988IFGjx7d7Jrc3Fzl5eU1GS8sLFRCQkKw5QIAgHbU0NCgOXPmqKamRi6Xq8V5Vz2MNGfixInq16+f/vjHPza7v7kzIykpKTpz5kyrzbTG4/GoqKhIU6dOVVxcXFD3Ec7oL7J19P5G5O60UFVoOWOMlo316omyGLm9DtvlhJyt/g7lprfLcTr6Y/Bqqa2tVWJi4mXDSLs9TXOpcePGac+ePS3udzqdcjqdTcbj4uLa/EUMxX2EM/qLbB21P3dj9PzxdnsdUdXP97V3f+39eOioj8GrebwrYeV9Rg4cOKDevXvbODQAAAgzAZ8ZOXfunI4dO+bbPn78uA4cOKAePXqoX79+ysnJ0RdffKE//OEPkqTnnntOAwcO1PDhw3X+/Hlt3LhR7733nt59993QdQEAACJWwGGkrKxMkydP9m1nZ2dLkubOnauCggJVVlaqoqLCt//ChQt65JFH9MUXXyghIUGpqan6j//4D7/7AAAAHVfAYWTSpElq7ZrXgoICv+3HHntMjz32WMCFAQCAjoHPpgEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVdfYLgBAaA1Y/LbtEppwxhqtGCeNyN0pd6PDdjkAwgxnRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgVcBhpKSkRDNmzFCfPn3kcDi0bdu2y64pLi7W6NGj5XQ6NWjQIBUUFARRKgAAiEYBh5H6+nqNHDlSa9euvaL5x48fV0ZGhiZPnqwDBw7o4Ycf1gMPPKCdO3cGXCwAAIg+Ab/p2fTp0zV9+vQrnr9+/XoNHDhQq1atkiQNHTpUe/bs0erVq5Wenh7o4QEAQJS56u/AWlpaqilTpviNpaen6+GHH25xjdvtltvt9m3X1tZKkjwejzweT1B1XFwX7PpwR3+RLZT9OWNNm+8j1Jwxxu/faBTtPdrqr70e8/yOubrHvRyHMSbonyyHw6GtW7dq5syZLc4ZMmSI5s2bp5ycHN/Y9u3blZGRoYaGBnXu3LnJmtzcXOXl5TUZLywsVEJCQrDlAgCAdtTQ0KA5c+aopqZGLperxXlh+dk0OTk5ys7O9m3X1tYqJSVF06ZNa7WZ1ng8HhUVFWnq1KmKi4sLValhg/4iWyj7G5EbftdjOWOMlo316omyGLm90fnZNNHeo63+DuW2z9P5/I65Oi4+s3E5Vz2MJCUlqbq62m+surpaLper2bMikuR0OuV0OpuMx8XFtfmLGIr7CGf0F9lC0V84fxCd2+sI6/pCIdp7bO/+2vvxzu+Y0B/vSlz19xlJS0vTrl27/MaKioqUlpZ2tQ8NAAAiQMBh5Ny5czpw4IAOHDgg6duX7h44cEAVFRWSvn2KJTMz0zf/wQcf1P/93//pscce06effqoXX3xRr7/+un71q1+FpgMAABDRAg4jZWVlGjVqlEaNGiVJys7O1qhRo7RkyRJJUmVlpS+YSNLAgQP19ttvq6ioSCNHjtSqVau0ceNGXtYLAAAkBXHNyKRJk9TaC3Cae3fVSZMmaf/+/YEeCgAAdAB8Ng0AALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACw6qp/ai8AAMEasPjtdjmOM9ZoxThpRO7ONn8q8YnlGSGqquPgzAgAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKuCCiNr167VgAED1KlTJ40fP15/+tOfWpxbUFAgh8Phd+vUqVPQBQMAgOgScBh57bXXlJ2draVLl+qjjz7SyJEjlZ6ertOnT7e4xuVyqbKy0nc7efJkm4oGAADRI+Aw8uyzz2r+/PmaN2+ehg0bpvXr1yshIUEvv/xyi2scDoeSkpJ8t169erWpaAAAED2uCWTyhQsXVF5erpycHN9YTEyMpkyZotLS0hbXnTt3Tv3795fX69Xo0aP19NNPa/jw4S3Od7vdcrvdvu3a2lpJksfjkcfjCaRkn4vrgl0f7ugvsoWyP2esafN9hJozxvj9G42ivUf6u3Lh+HvK1u/QKz2ewxhzxV/5U6dOKTk5WXv37lVaWppv/LHHHtPu3bv1wQcfNFlTWlqqo0ePKjU1VTU1NXrmmWdUUlKiw4cPq2/fvs0eJzc3V3l5eU3GCwsLlZCQcKXlAgAAixoaGjRnzhzV1NTI5XK1OC+gMyPBSEtL8wsuEyZM0NChQ7VhwwYtW7as2TU5OTnKzs72bdfW1iolJUXTpk1rtZnWeDweFRUVaerUqYqLiwvqPsIZ/UW2UPY3IndniKoKHWeM0bKxXj1RFiO312G7nKsi2nukvyt3KDc9RFWFjq3foRef2bicgMJIYmKiYmNjVV1d7TdeXV2tpKSkK7qPuLg4jRo1SseOHWtxjtPplNPpbHZtW7+IobiPcEZ/kS0U/bkbw/cPhdvrCOv6QiHae6S/ywvn31Ht/Tv0So8V0AWs8fHxGjNmjHbt2uUb83q92rVrl9/Zj9Y0Njbq4MGD6t27dyCHBgAAUSrgp2mys7M1d+5cjR07VuPGjdNzzz2n+vp6zZs3T5KUmZmp5ORk5efnS5KefPJJ3XbbbRo0aJDOnj2rlStX6uTJk3rggQdC2wkAAIhIAYeR++67T19++aWWLFmiqqoq3XLLLdqxY4fv5boVFRWKifnuhMvXX3+t+fPnq6qqSt27d9eYMWO0d+9eDRs2LHRdAACAiBXUBawLFizQggULmt1XXFzst7169WqtXr06mMMAAIAOgM+mAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWBXUB+UBHcWAxW+3y3GcsUYrxkkjcnfK3ehol2MCQLjgzAgAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMCqa4JZtHbtWq1cuVJVVVUaOXKknn/+eY0bN67F+Vu2bNETTzyhEydOaPDgwfrd736nu+++O+iiAQAIVwMWv227hCacsUYrxkkjcnfK3ehosv/E8gwLVX0n4DDy2muvKTs7W+vXr9f48eP13HPPKT09XUeOHFHPnj2bzN+7d69mz56t/Px8/fSnP1VhYaFmzpypjz76SCNGjAhJEwh/bX1wXu6BBACIXAE/TfPss89q/vz5mjdvnoYNG6b169crISFBL7/8crPz16xZo7vuukuLFi3S0KFDtWzZMo0ePVovvPBCm4sHAACRL6AzIxcuXFB5eblycnJ8YzExMZoyZYpKS0ubXVNaWqrs7Gy/sfT0dG3btq3F47jdbrndbt92TU2NJOmrr76Sx+MJpGQfj8ejhoYG/fnPf1ZcXFxQ9xHOwr2/a76pb9t6r1FDg1fXeGLU6I2+MyP0F/mivUf6i2yX6+/Pf/7zVTluXV2dJMkY03p9gdzpmTNn1NjYqF69evmN9+rVS59++mmza6qqqpqdX1VV1eJx8vPzlZeX12R84MCBgZSLKDPHdgFXGf1Fvmjvkf4iW2v9Ja66useuq6tTt27dWtwf1AWsV1tOTo7f2RSv16uvvvpK119/vRyO4BJrbW2tUlJS9Nlnn8nlcoWq1LBBf5GN/iJftPdIf5HNVn/GGNXV1alPnz6tzgsojCQmJio2NlbV1dV+49XV1UpKSmp2TVJSUkDzJcnpdMrpdPqNXXfddYGU2iKXyxWVP2gX0V9ko7/IF+090l9ks9Ffa2dELgroAtb4+HiNGTNGu3bt8o15vV7t2rVLaWlpza5JS0vzmy9JRUVFLc4HAAAdS8BP02RnZ2vu3LkaO3asxo0bp+eee0719fWaN2+eJCkzM1PJycnKz8+XJC1cuFATJ07UqlWrlJGRoc2bN6usrEwvvfRSaDsBAAARKeAwct999+nLL7/UkiVLVFVVpVtuuUU7duzwXaRaUVGhmJjvTrhMmDBBhYWF+s1vfqPHH39cgwcP1rZt29r9PUacTqeWLl3a5OmfaEF/kY3+Il+090h/kS3c+3OYy73eBgAA4Cris2kAAIBVhBEAAGAVYQQAAFhFGAEAAFZFfRi555571K9fP3Xq1Em9e/fWL37xC506dcpvzscff6zbb79dnTp1UkpKilasWGGp2sCdOHFC999/vwYOHKjOnTvrhhtu0NKlS3XhwgW/eZHc41NPPaUJEyYoISGhxTe/q6ioUEZGhhISEtSzZ08tWrRI33zzTfsW2gZr167VgAED1KlTJ40fP15/+tOfbJcUlJKSEs2YMUN9+vSRw+Fo8hlUxhgtWbJEvXv3VufOnTVlyhQdPXrUTrFByM/P16233qquXbuqZ8+emjlzpo4cOeI35/z588rKytL111+vLl266K//+q+bvPFjuFq3bp1SU1N9b4yVlpamd955x7c/kntrzvLly+VwOPTwww/7xiK5x9zcXDkcDr/bTTfd5Nsfzr1FfRiZPHmyXn/9dR05ckT/+q//qv/93//V3/zN3/j219bWatq0aerfv7/Ky8u1cuVK5ebmRsz7oHz66afyer3asGGDDh8+rNWrV2v9+vV6/PHHfXMivccLFy5o1qxZeuihh5rd39jYqIyMDF24cEF79+7VK6+8ooKCAi1ZsqSdKw3Oa6+9puzsbC1dulQfffSRRo4cqfT0dJ0+fdp2aQGrr6/XyJEjtXbt2mb3r1ixQv/8z/+s9evX64MPPtC1116r9PR0nT9/vp0rDc7u3buVlZWlffv2qaioSB6PR9OmTVN9/XcfBPmrX/1K//7v/64tW7Zo9+7dOnXqlP7qr/7KYtVXrm/fvlq+fLnKy8tVVlamn/zkJ7r33nt1+PBhSZHd2/d9+OGH2rBhg1JTU/3GI73H4cOHq7Ky0nfbs2ePb19Y92Y6mDfffNM4HA5z4cIFY4wxL774ounevbtxu92+Ob/+9a/NjTfeaKvENluxYoUZOHCgbztaety0aZPp1q1bk/Ht27ebmJgYU1VV5Rtbt26dcblcfj2Hq3HjxpmsrCzfdmNjo+nTp4/Jz8+3WFXbSTJbt271bXu9XpOUlGRWrlzpGzt79qxxOp3mX/7lXyxU2HanT582kszu3buNMd/2ExcXZ7Zs2eKb89///d9GkiktLbVVZpt0797dbNy4Map6q6urM4MHDzZFRUVm4sSJZuHChcaYyP/+LV261IwcObLZfeHeW9SfGbnUV199pVdffVUTJkxQXFycJKm0tFR33HGH4uPjffPS09N15MgRff3117ZKbZOamhr16NHDtx2NPV6qtLRUN998s9+nQ6enp6u2ttb3f3Th6sKFCyovL9eUKVN8YzExMZoyZYpKS0stVhZ6x48fV1VVlV+v3bp10/jx4yO215qaGknyPd7Ky8vl8Xj8erzpppvUr1+/iOuxsbFRmzdvVn19vdLS0qKqt6ysLGVkZPj1IkXH9+/o0aPq06ePfvjDH+rnP/+5KioqJIV/bx0ijPz617/Wtddeq+uvv14VFRV68803ffuqqqr8/ohJ8m1XVVW1a52hcOzYMT3//PP6x3/8R99YtPX4fZHc35kzZ9TY2Nhs/eFee6Au9hMtvXq9Xj388MP60Y9+5HtH6aqqKsXHxze5timSejx48KC6dOkip9OpBx98UFu3btWwYcOiojdJ2rx5sz766CPfR5ZcKtJ7HD9+vAoKCrRjxw6tW7dOx48f1+233666urqw7y0iw8jixYubXKTz/dunn37qm79o0SLt379f7777rmJjY5WZmSkT5m88G2iPkvTFF1/orrvu0qxZszR//nxLlV+ZYPoDwklWVpYOHTqkzZs32y4lpG688UYdOHBAH3zwgR566CHNnTtXn3zyie2yQuKzzz7TwoUL9eqrr6pTp062ywm56dOna9asWUpNTVV6erq2b9+us2fP6vXXX7dd2mUF/Nk04eCRRx7RL3/5y1bn/PCHP/T9d2JiohITEzVkyBANHTpUKSkp2rdvn9LS0pSUlNTkauKL20lJSSGv/UoF2uOpU6c0efJkTZgwocmFqeHYY6D9tSYpKanJq09s93elEhMTFRsb2+z3J9xrD9TFfqqrq9W7d2/feHV1tW655RZLVQVnwYIFeuutt1RSUqK+ffv6xpOSknThwgWdPXvW7/9AI+n7GR8fr0GDBkmSxowZow8//FBr1qzRfffdF/G9lZeX6/Tp0xo9erRvrLGxUSUlJXrhhRe0c+fOiO/xUtddd52GDBmiY8eOaerUqeHdm+2LVtrbyZMnjSTz/vvvG2O+u7jz4gWtxhiTk5MTURd3fv7552bw4MHmb//2b80333zTZH809GjM5S9gra6u9o1t2LDBuFwuc/78+XasMDjjxo0zCxYs8G03Njaa5OTkqL2A9ZlnnvGN1dTURNQFrF6v12RlZZk+ffqY//mf/2my/+JFgm+88YZv7NNPPw2biwSDMXnyZDN37tyo6K22ttYcPHjQ7zZ27Fjzd3/3d+bgwYNR0eOl6urqTPfu3c2aNWvCvreoDiP79u0zzz//vNm/f785ceKE2bVrl5kwYYK54YYbfH+kzp49a3r16mV+8YtfmEOHDpnNmzebhIQEs2HDBsvVX5nPP//cDBo0yNx5553m888/N5WVlb7bRZHe48mTJ83+/ftNXl6e6dKli9m/f7/Zv3+/qaurM8YY880335gRI0aYadOmmQMHDpgdO3aYH/zgByYnJ8dy5Vdm8+bNxul0moKCAvPJJ5+Yf/iHfzDXXXed36uDIkVdXZ3v+yPJPPvss2b//v3m5MmTxhhjli9fbq677jrz5ptvmo8//tjce++9ZuDAgeYvf/mL5cqvzEMPPWS6detmiouL/R5rDQ0NvjkPPvig6devn3nvvfdMWVmZSUtLM2lpaRarvnKLFy82u3fvNsePHzcff/yxWbx4sXE4HObdd981xkR2by259NU0xkR2j4888ogpLi42x48fN//5n/9ppkyZYhITE83p06eNMeHdW1SHkY8//thMnjzZ9OjRwzidTjNgwADz4IMPms8//9xv3n/913+ZH//4x8bpdJrk5GSzfPlySxUHbtOmTUZSs7dLRXKPc+fObba/i2e3jDHmxIkTZvr06aZz584mMTHRPPLII8bj8dgrOkDPP/+86devn4mPjzfjxo0z+/bts11SUN5///1mv1dz5841xnx7ZuGJJ54wvXr1Mk6n09x5553myJEjdosOQEuPtU2bNvnm/OUvfzH/9E//ZLp3724SEhLMz372M7//OQhnf//3f2/69+9v4uPjzQ9+8ANz5513+oKIMZHdW0u+H0Yiucf77rvP9O7d28THx5vk5GRz3333mWPHjvn2h3NvDmPC/EpOAAAQ1SLy1TQAACB6EEYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABY9f/TyQWf12uzNgAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df.MEAN_NONE_SPATIAL_NEIGHBORS.hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "9582d806",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.rename(\n",
    "    columns={\n",
    "        \"MEAN_NONE_SPATIAL_NEIGHBORS\": \"tmax\",\n",
    "    }\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "c4fd5b01",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['time', 'locations', 'tmax', 'lat', 'lon', 'labelDim',\n",
       "       'location_label'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "e1bd88af",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.drop(columns=[\"lat\",\"lon\",\"locations\",\"labelDim\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "dafc6d3e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>time</th>\n",
       "      <th>tmax</th>\n",
       "      <th>location_label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2004-12-31</td>\n",
       "      <td>10.507685</td>\n",
       "      <td>04001942600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2004-12-31</td>\n",
       "      <td>10.008235</td>\n",
       "      <td>04001942700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2004-12-31</td>\n",
       "      <td>8.527029</td>\n",
       "      <td>04001944000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2004-12-31</td>\n",
       "      <td>8.570941</td>\n",
       "      <td>04001944100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2004-12-31</td>\n",
       "      <td>9.604968</td>\n",
       "      <td>04001944201</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        time       tmax location_label\n",
       "0 2004-12-31  10.507685    04001942600\n",
       "1 2004-12-31  10.008235    04001942700\n",
       "2 2004-12-31   8.527029    04001944000\n",
       "3 2004-12-31   8.570941    04001944100\n",
       "4 2004-12-31   9.604968    04001944201"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "fd46b959",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_parquet(\"outputs/temp_merge_1.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "5d0170cb-ec15-45c2-9bcd-816ba59c12bf",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# tmax[\"MEAN_NONE_SPATIAL_NEIGHBORS\"].plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "76da3821-2f44-4fb6-a0f0-2a5c6854e019",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# tmin = xr.open_dataset(\"data/esri/MinTemp_2006_2021_Cali.nc\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "876b28e5-81e7-4d2d-9ce4-b56c6d0ad56b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# rhum = xr.open_dataset(\"data/esri/RelHum_2006_2021_Cali.nc\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e4199cad-b239-4464-909f-5999a85d7bc5",
   "metadata": {},
   "source": [
    "## Join tmax, tmin and rhum"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "ee8d8863-3a41-4082-997d-258a23d99123",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# df = xr.merge([tmax, tmin[[\"MIN_TEMPERATURE_NONE_SPATIAL_NEIGHBORS\"]]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "66e2f9f0-bacf-463c-be30-d379b27b4215",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# df = xr.merge([df, rhum[[\"RELATIVE_HUMIDITY_NONE_SPATIAL_NEIGHBORS\"]]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "12380552-a76d-40fc-a37b-ff626e14218e",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# del tmin\n",
    "# del tmax\n",
    "# del rhum"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "3ef7a5cc-94f7-4d0e-b9e1-f12868d7f3b3",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# df = df[\n",
    "#     [\n",
    "#         \"FIPS\",\n",
    "#         \"MAX_TEMPERATURE_NONE_SPATIAL_NEIGHBORS\",\n",
    "#         \"MIN_TEMPERATURE_NONE_SPATIAL_NEIGHBORS\",\n",
    "#         \"RELATIVE_HUMIDITY_NONE_SPATIAL_NEIGHBORS\",\n",
    "#     ]\n",
    "# ].to_dataframe()\n",
    "\n",
    "# df = df.rename(\n",
    "#     columns={\n",
    "#         \"MAX_TEMPERATURE_NONE_SPATIAL_NEIGHBORS\": \"tmax\",\n",
    "#         \"MIN_TEMPERATURE_NONE_SPATIAL_NEIGHBORS\": \"tmin\",\n",
    "#         \"RELATIVE_HUMIDITY_NONE_SPATIAL_NEIGHBORS\": \"rhum\",\n",
    "#     }\n",
    "# )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "73c06547",
   "metadata": {},
   "outputs": [],
   "source": [
    "wf = xr.open_dataset(\"data/wc/Wildfireday_daily_CensusTract_2006_2021_WesternUS.nc\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "76fe33ad",
   "metadata": {},
   "outputs": [],
   "source": [
    "locs = wf[[\"location_label\"]].to_dataframe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "b18989e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "wf = wf[[\"FIREDAY_NONE_ZEROS\"]].to_dataframe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "9d8f43bf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "FIREDAY_NONE_ZEROS\n",
       "0.0    105698359\n",
       "1.0       124793\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wf.FIREDAY_NONE_ZEROS.value_counts(dropna=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "65c99d17",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "18108"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wf.locations.nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "05e8a83e",
   "metadata": {},
   "outputs": [],
   "source": [
    "wf = wf.reset_index()\n",
    "locs = locs.reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "a32a79d4",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = wf.merge(locs, left_on='locations', right_on=\"labelDim\", how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "7d0483ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.drop(columns=[\"lat\",\"lon\",\"locations\",\"labelDim\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "f143a142",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.rename(\n",
    "    columns={\n",
    "        \"FIREDAY_NONE_ZEROS\": \"wfday\",\n",
    "    }\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "f5219a29",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>wfday</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>time</th>\n",
       "      <th>location_label</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">2005-12-31</th>\n",
       "      <th>04001942600</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001942700</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944000</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944100</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944201</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                           wfday\n",
       "time       location_label       \n",
       "2005-12-31 04001942600       0.0\n",
       "           04001942700       0.0\n",
       "           04001944000       0.0\n",
       "           04001944100       0.0\n",
       "           04001944201       0.0"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "34fd9f7b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "105823152"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "ebf8b0f2",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.groupby(by=[\"time\", \"location_label\"]).first()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "b63c7b5f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "105823152"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "f57e2788",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_parquet(\"outputs/temp_merge_2.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "d01d359e",
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = pd.read_parquet(\"outputs/temp_merge_1.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "24f5226a",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_parquet(\"outputs/temp_merge_2.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "5c654b8d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "112432572"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "73d75c23",
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = df2.groupby(by=[\"time\", \"location_label\"]).first()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "3a49e822",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.join(df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "c29a8fae-2fb1-41d6-b54a-4b67b5335c5a",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# df = xr.merge([tmax, wf[[\"FIREDAY_NONE_ZEROS\"]]])\n",
    "# df = df[\n",
    "#     [\n",
    "#         \"FIPS\",\n",
    "#         \"MEAN_NONE_SPATIAL_NEIGHBORS\",\n",
    "#         \"FIREDAY_NONE_ZEROS\",\n",
    "#     ]\n",
    "# ].to_dataframe()\n",
    "\n",
    "# df = df.rename(\n",
    "#     columns={\n",
    "#         \"MEAN_NONE_SPATIAL_NEIGHBORS\": \"tmax\",\n",
    "#         \"FIREDAY_NONE_ZEROS\": \"wfday\",\n",
    "#     }\n",
    "# )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "897fe035",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_parquet(\"outputs/temp_merge_3.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "404f7a1f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>wfday</th>\n",
       "      <th>tmax</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>time</th>\n",
       "      <th>location_label</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">2005-12-31</th>\n",
       "      <th>04001942600</th>\n",
       "      <td>0.0</td>\n",
       "      <td>12.056090</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001942700</th>\n",
       "      <td>0.0</td>\n",
       "      <td>11.032996</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944000</th>\n",
       "      <td>0.0</td>\n",
       "      <td>9.850354</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944100</th>\n",
       "      <td>0.0</td>\n",
       "      <td>9.774376</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944201</th>\n",
       "      <td>0.0</td>\n",
       "      <td>11.232711</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                           wfday       tmax\n",
       "time       location_label                  \n",
       "2005-12-31 04001942600       0.0  12.056090\n",
       "           04001942700       0.0  11.032996\n",
       "           04001944000       0.0   9.850354\n",
       "           04001944100       0.0   9.774376\n",
       "           04001944201       0.0  11.232711"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "f000de13-6966-4391-9b12-dec5d3107488",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "df = df.loc[(slice(pd.Timestamp(\"2006-01-01\"), None), slice(None)), :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "8fedc0a5-f951-46ca-841d-f742e83d213b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "df = df.loc[(slice(None, pd.Timestamp(\"2020-12-31\")), slice(None)), :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "353e5996-6e80-4417-98b5-3a7de386581c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "df.to_parquet(\"outputs/temp_merge_4.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "02292928-b92c-46cf-92c5-600ac916d378",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "df = pd.read_parquet(\"outputs/temp_merge_4.parquet\", engine=\"pyarrow\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ff0d0e04",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>wfday</th>\n",
       "      <th>tmax</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>time</th>\n",
       "      <th>location_label</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">2006-01-01</th>\n",
       "      <th>04001942600</th>\n",
       "      <td>0.0</td>\n",
       "      <td>15.978498</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001942700</th>\n",
       "      <td>0.0</td>\n",
       "      <td>14.836937</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944000</th>\n",
       "      <td>0.0</td>\n",
       "      <td>14.192923</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944100</th>\n",
       "      <td>0.0</td>\n",
       "      <td>14.254541</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944201</th>\n",
       "      <td>0.0</td>\n",
       "      <td>15.200634</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                           wfday       tmax\n",
       "time       location_label                  \n",
       "2006-01-01 04001942600       0.0  15.978498\n",
       "           04001942700       0.0  14.836937\n",
       "           04001944000       0.0  14.192923\n",
       "           04001944100       0.0  14.254541\n",
       "           04001944201       0.0  15.200634"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "b2503ebc-c3a0-44a6-a2e8-8f6aea83033b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "#df = df.groupby(by=[\"time\", \"lat\", \"lon\"]).first()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "a1a7424b-e780-43bd-bad7-be936758763f",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>wfday</th>\n",
       "      <th>tmax</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>time</th>\n",
       "      <th>location_label</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">2006-01-01</th>\n",
       "      <th>04001942600</th>\n",
       "      <td>0.0</td>\n",
       "      <td>15.978498</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001942700</th>\n",
       "      <td>0.0</td>\n",
       "      <td>14.836937</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944000</th>\n",
       "      <td>0.0</td>\n",
       "      <td>14.192923</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944100</th>\n",
       "      <td>0.0</td>\n",
       "      <td>14.254541</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944201</th>\n",
       "      <td>0.0</td>\n",
       "      <td>15.200634</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                           wfday       tmax\n",
       "time       location_label                  \n",
       "2006-01-01 04001942600       0.0  15.978498\n",
       "           04001942700       0.0  14.836937\n",
       "           04001944000       0.0  14.192923\n",
       "           04001944100       0.0  14.254541\n",
       "           04001944201       0.0  15.200634"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "8aa3d145-6e4d-4f44-9c8a-8476dbd98b0a",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "wfday    0\n",
       "tmax     0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.isna().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ea18f227-d75c-4764-93bd-9afcf9190c2b",
   "metadata": {},
   "source": [
    "## Add PM 2.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "47b71a2f-3f8a-4fbc-a1a5-9d3be4344bcb",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# pm25 = xr.open_dataset(\"data/esri/PM25_Nature_2006_2020_Cali.nc\")\n",
    "# df_pm25 = pm25[[\"MEAN_NONE_SPATIAL_NEIGHBORS\"]].to_dataframe()\n",
    "# df_pm25 = df_pm25.rename(columns={\"MEAN_NONE_SPATIAL_NEIGHBORS\": \"pm25\"})\n",
    "# df_pm25 = df_pm25.reset_index()\n",
    "# df_pm25 = df_pm25.groupby(by=[\"time\", \"lat\", \"lon\"]).first()\n",
    "# df_pm25.head()\n",
    "# df.head()\n",
    "# df = df.join(df_pm25, rsuffix=\"_pm25\").progress_apply(lambda x: x)\n",
    "# df.to_parquet(\"outputs/temp_merge_3.parquet\")\n",
    "# df.isna().sum()\n",
    "# df.head()\n",
    "# len(df.FIPS.unique())\n",
    "# df = df.reset_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c61462f2-4d61-4195-905a-e59380dba392",
   "metadata": {},
   "source": [
    "## Add crosswalk"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "764ae365-911c-42f9-9343-53b049cad571",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>STATE_ABBR</th>\n",
       "      <th>STATE_FIPS</th>\n",
       "      <th>COUNTY_FIP</th>\n",
       "      <th>STCOFIPS</th>\n",
       "      <th>TRACT_FIPS</th>\n",
       "      <th>FIPS</th>\n",
       "      <th>POPULATION</th>\n",
       "      <th>POP_SQMI</th>\n",
       "      <th>SQMI</th>\n",
       "      <th>Shape_Leng</th>\n",
       "      <th>Shape_Area</th>\n",
       "      <th>geometry</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AZ</td>\n",
       "      <td>04</td>\n",
       "      <td>001</td>\n",
       "      <td>04001</td>\n",
       "      <td>942600</td>\n",
       "      <td>04001942600</td>\n",
       "      <td>1549</td>\n",
       "      <td>2.6</td>\n",
       "      <td>589.09</td>\n",
       "      <td>232812.475761</td>\n",
       "      <td>1.525731e+09</td>\n",
       "      <td>POLYGON ((-1218270.401 32832.246, -1211358.143...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AZ</td>\n",
       "      <td>04</td>\n",
       "      <td>001</td>\n",
       "      <td>04001</td>\n",
       "      <td>942700</td>\n",
       "      <td>04001942700</td>\n",
       "      <td>4491</td>\n",
       "      <td>3.9</td>\n",
       "      <td>1155.35</td>\n",
       "      <td>494105.485641</td>\n",
       "      <td>2.992333e+09</td>\n",
       "      <td>POLYGON ((-1152995.229 23611.170, -1151650.010...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AZ</td>\n",
       "      <td>04</td>\n",
       "      <td>001</td>\n",
       "      <td>04001</td>\n",
       "      <td>944000</td>\n",
       "      <td>04001944000</td>\n",
       "      <td>5348</td>\n",
       "      <td>17.5</td>\n",
       "      <td>305.81</td>\n",
       "      <td>189340.223564</td>\n",
       "      <td>7.920447e+08</td>\n",
       "      <td>POLYGON ((-1163367.996 -71037.222, -1163356.94...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AZ</td>\n",
       "      <td>04</td>\n",
       "      <td>001</td>\n",
       "      <td>04001</td>\n",
       "      <td>944100</td>\n",
       "      <td>04001944100</td>\n",
       "      <td>5495</td>\n",
       "      <td>7.8</td>\n",
       "      <td>708.25</td>\n",
       "      <td>396654.409548</td>\n",
       "      <td>1.834355e+09</td>\n",
       "      <td>POLYGON ((-1174876.260 -9365.197, -1174863.596...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AZ</td>\n",
       "      <td>04</td>\n",
       "      <td>001</td>\n",
       "      <td>04001</td>\n",
       "      <td>944201</td>\n",
       "      <td>04001944201</td>\n",
       "      <td>4021</td>\n",
       "      <td>20.7</td>\n",
       "      <td>193.93</td>\n",
       "      <td>116350.697095</td>\n",
       "      <td>5.022734e+08</td>\n",
       "      <td>POLYGON ((-1210854.946 -45587.815, -1210757.05...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  STATE_ABBR STATE_FIPS COUNTY_FIP STCOFIPS TRACT_FIPS         FIPS  \\\n",
       "0         AZ         04        001    04001     942600  04001942600   \n",
       "1         AZ         04        001    04001     942700  04001942700   \n",
       "2         AZ         04        001    04001     944000  04001944000   \n",
       "3         AZ         04        001    04001     944100  04001944100   \n",
       "4         AZ         04        001    04001     944201  04001944201   \n",
       "\n",
       "   POPULATION  POP_SQMI     SQMI     Shape_Leng    Shape_Area  \\\n",
       "0        1549       2.6   589.09  232812.475761  1.525731e+09   \n",
       "1        4491       3.9  1155.35  494105.485641  2.992333e+09   \n",
       "2        5348      17.5   305.81  189340.223564  7.920447e+08   \n",
       "3        5495       7.8   708.25  396654.409548  1.834355e+09   \n",
       "4        4021      20.7   193.93  116350.697095  5.022734e+08   \n",
       "\n",
       "                                            geometry  \n",
       "0  POLYGON ((-1218270.401 32832.246, -1211358.143...  \n",
       "1  POLYGON ((-1152995.229 23611.170, -1151650.010...  \n",
       "2  POLYGON ((-1163367.996 -71037.222, -1163356.94...  \n",
       "3  POLYGON ((-1174876.260 -9365.197, -1174863.596...  \n",
       "4  POLYGON ((-1210854.946 -45587.815, -1210757.05...  "
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import geopandas as gpd\n",
    "gdf = gpd.read_file(\"data/WesternUSCensusTract/CensusTract2020_WesternUS.shp\")\n",
    "gdf.head()\n",
    "# df.FIPS = df.FIPS.astype(int)\n",
    "# gdf = gdf.drop(columns=[\"Shape_Leng\", \"Shape_Area\"])\n",
    "# df.FIPS.value_counts()\n",
    "# df = df.merge(gdf, on=\"FIPS\")\n",
    "# len(df)\n",
    "# df.isna().sum()\n",
    "# type(df)\n",
    "# df = df.drop(columns=\"geometry\")\n",
    "# df.to_parquet(\"outputs/temp_merge_4.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "fa7175c9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['AZ', 'CA', 'CO', 'ID', 'MT', 'NV', 'NM', 'OR', 'TX', 'UT', 'WA',\n",
       "       'WY'], dtype=object)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gdf[\"STATE_ABBR\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "33fafda3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "18108"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gdf[\"FIPS\"].nunique()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "146288b1-27f0-461f-8550-e3e454b761ec",
   "metadata": {},
   "source": [
    "## Join smoke PM2.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "74f56b79-ce34-4acc-a7fe-401b6ce87678",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# smoke_df = pd.read_parquet(\"outputs/smoke_pm25_predicted_with_fips.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "33a947f5-3b26-4ce7-ad9d-499b5c83356b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "smoke_df = pd.read_parquet(\n",
    "    \"data/smoke_pm/smoke_pm_all_time_2020_map.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "fa968eb0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>FIPS</th>\n",
       "      <th>smoke_pm</th>\n",
       "      <th>date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>04001942600</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2006-01-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>04001942700</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2006-01-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>04001944000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2006-01-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>04001944100</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2006-01-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>04001944201</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2006-01-01</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          FIPS  smoke_pm       date\n",
       "0  04001942600       0.0 2006-01-01\n",
       "1  04001942700       0.0 2006-01-01\n",
       "2  04001944000       0.0 2006-01-01\n",
       "3  04001944100       0.0 2006-01-01\n",
       "4  04001944201       0.0 2006-01-01"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "smoke_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "ffd4e70c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "18108"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(smoke_df.FIPS.unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "9cb24aec-b5dc-48a4-a8af-ae76cd5960b0",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "smoke_df = smoke_df.rename(columns={\"FIPS\":\"location_label\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "08e18194",
   "metadata": {},
   "outputs": [],
   "source": [
    "smoke_df = smoke_df.rename(columns={\"date\":\"time\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "982bf5fe",
   "metadata": {},
   "outputs": [],
   "source": [
    "smoke_df = smoke_df.groupby(by=[\"time\", \"location_label\"]).first()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "2e5258d6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>smoke_pm</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>time</th>\n",
       "      <th>location_label</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">2006-01-01</th>\n",
       "      <th>04001942600</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001942700</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944000</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944100</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944201</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                           smoke_pm\n",
       "time       location_label          \n",
       "2006-01-01 04001942600          0.0\n",
       "           04001942700          0.0\n",
       "           04001944000          0.0\n",
       "           04001944100          0.0\n",
       "           04001944201          0.0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "smoke_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "70738985-90bf-4628-8dcf-8d838dcb55bd",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "01956ce7",
   "metadata": {},
   "outputs": [],
   "source": [
    "merged = df.join(smoke_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3dd20720-c1af-4bdd-a77d-f236f758f4ff",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# df[\"FIPS_1\"] = df[\"FIPS_1\"].astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9ee3442f-1cf9-4a37-bf63-64a8aa09a6e2",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# merged = df.merge(smoke_df, left_on=[\"FIPS_1\", \"time\"], right_on=[\"GEOID\", \"date\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "50e154bc-61ae-4f02-84d6-569dc06efb91",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>wfday</th>\n",
       "      <th>tmax</th>\n",
       "      <th>smoke_pm</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>time</th>\n",
       "      <th>location_label</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">2006-01-01</th>\n",
       "      <th>04001942600</th>\n",
       "      <td>0.0</td>\n",
       "      <td>15.978498</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001942700</th>\n",
       "      <td>0.0</td>\n",
       "      <td>14.836937</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944000</th>\n",
       "      <td>0.0</td>\n",
       "      <td>14.192923</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944100</th>\n",
       "      <td>0.0</td>\n",
       "      <td>14.254541</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944201</th>\n",
       "      <td>0.0</td>\n",
       "      <td>15.200634</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                           wfday       tmax  smoke_pm\n",
       "time       location_label                            \n",
       "2006-01-01 04001942600       0.0  15.978498       0.0\n",
       "           04001942700       0.0  14.836937       0.0\n",
       "           04001944000       0.0  14.192923       0.0\n",
       "           04001944100       0.0  14.254541       0.0\n",
       "           04001944201       0.0  15.200634       0.0"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "bfa66f96",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "97946172"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(smoke_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "93a66a3d-fb93-4245-9a82-64a0a0a8b185",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "99213732"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(merged)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "72c3f98c-39d4-4e10-85e8-bc1bd7da54c8",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# merged = merged.drop(\n",
    "#     columns=[\"lat\", \"lon\", \"locations\", \"FIPS\", \"locations_pm25\", \"date\"]\n",
    "# )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "f927ed2b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>wfday</th>\n",
       "      <th>tmax</th>\n",
       "      <th>smoke_pm</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>time</th>\n",
       "      <th>location_label</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">2006-01-01</th>\n",
       "      <th>04001942600</th>\n",
       "      <td>0.0</td>\n",
       "      <td>15.978498</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001942700</th>\n",
       "      <td>0.0</td>\n",
       "      <td>14.836937</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944000</th>\n",
       "      <td>0.0</td>\n",
       "      <td>14.192923</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944100</th>\n",
       "      <td>0.0</td>\n",
       "      <td>14.254541</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>04001944201</th>\n",
       "      <td>0.0</td>\n",
       "      <td>15.200634</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                           wfday       tmax  smoke_pm\n",
       "time       location_label                            \n",
       "2006-01-01 04001942600       0.0  15.978498       0.0\n",
       "           04001942700       0.0  14.836937       0.0\n",
       "           04001944000       0.0  14.192923       0.0\n",
       "           04001944100       0.0  14.254541       0.0\n",
       "           04001944201       0.0  15.200634       0.0"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "8f805680-aaca-4f26-8a69-be777cb9c632",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "merged.to_parquet(\"outputs/final_merge_5_western_us.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8e1ca435-b9b7-4fb0-8e73-3137ddefafeb",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
